path = "./../../data/PPD_hairCortisol_PlosOne_.sav"
dataset= read_sav(path) #meta data dropped? Yes. Use SPSS to access
write.csv(dataset, file = "./../../data/data_span.csv")import googletrans as gt
import pandas as pd
data = pd.read_csv('./../../data/data_span.csv',encoding = "ISO-8859-1")
trans = gt.Translator()
with open("./../../data/data_programatic_translation.csv", 'w') as f:
f.write('original, translated\n')
for old_col in data.columns.values:
new_col = trans.translate(old_col, src = 'spanish', dest='en').text
f.write(old_col + ',' + new_col + '\n')
data.rename(columns = {old_col:new_col}, inplace = True)
data.to_csv( "./../../data/data_eng.csv" )Paper Variables
df = read.csv("./../../data/data_eng.csv", header = TRUE)
df %>% variable.names()## [1] "X" "Unnamed..0"
## [3] "Competitor" "Age"
## [5] "FirstPregnancy" "Pregnancy.type"
## [7] "Level_Studies" "SLaboral"
## [9] "Appearance" "Sport"
## [11] "Profession" "What.study"
## [13] "Origin" "EPDS"
## [15] "depreposparto" "StateCivilDIcotomica"
## [17] "EmploymentSituationDico" "PetDicotomica"
## [19] "NationalityDicotomica" "PregnancyMethodDico"
## [21] "PreviousMiscarriagesDico" "WantedPregnancyDico"
## [23] "Occupation123" "LNCORTISOL1"
## [25] "LNCORTISOL2" "LNCORTISOL3"
## [27] "SexFetal" "PDQ1"
## [29] "PDQ2" "PDQ3"
## [31] "EEP1" "EEP2"
## [33] "EEP3" "SOMATIZATIONS1"
## [35] "OBSESSIONS.AND.COMPULSIONS1" "SENSITIVIDADINTERPERSONAL1"
## [37] "DEPRESSION1" "ANXIETY1"
## [39] "HOSTILIDAD1" "ANSIEDADFOBICA1"
## [41] "IDEACIÃ.NPARANOIDE1" "PSICOTICISMO1"
## [43] "IGS1" "SP1"
## [45] "PSDI1" "SOMATIZATIONS2"
## [47] "OBSESSIONS.AND.COMPULSIONS2" "INSTRUMENT.SENSITIVITY2"
## [49] "DEPRESSION2" "ANXIETY2"
## [51] "HOSTILITY2" "ANSIEDADFOBICA2"
## [53] "IDEACIÃ.NPARANOIDE2" "PSICOTICISMO2"
## [55] "IGS2" "SP2"
## [57] "PSDI2" "SOMATIZATIONS3"
## [59] "OBSESSIONS.AND.COMPULSIONS3" "INSTRUMENT.SENSITIVITY3"
## [61] "DEPRESSION3" "ANXIETY3"
## [63] "HOSTILIDAD3" "ANSIEDADFOBICA3"
## [65] "IDEACIÃ.NPARANOIDE3" "PSICOTICISMO3"
## [67] "IGS3" "SP3"
## [69] "PSDI3" "SexFetalDico"
## [71] "Cortisol1" "Cortisol2"
## [73] "Cortisol3"
# THIS IS WHERE WE CAN DROP UNUNSED VARIABLES, MUTATE NAMES AND ADD META DATA DESCRIBING VARIABLES (variable attribute data)
# There are 29 rows of all NA variables that were brought in from SPSS empty rows
before <- df %>% nrow()
df <- df %>% na.omit()
after <- df %>% nrow()
print(paste('rows dropped:', after-before))## [1] "rows dropped: -29"
df <- df %>% mutate('age' = Age)
attr(df$age, "shortDescription") <- "The age of the mother"
df <- df %>% mutate("nationality" = NationalityDicotomica)
attr(df$nationality, 'shortDescription') <- "The patient's nationality or country of origin at the time of pregnancy
[1 = Spain, 2 = Argentina, 3 = Chile, 4 = Morocco, 5 = Romania, 6 = Germany, 7 = Russia, 8 = Ecuador, 9 = Peru, 10 = Paraguay]"
#df <- df %>% mutate('Marital_Status' = ) # CANT FIND THIS ONE
df <- df %>% mutate('employed' = EmploymentSituationDico)
attr(df$employed, 'shortDescription') <- "Employment situation; is or is not employed at time of pregnancy [1 = unemployed, 2 = full-time job, 3 = half-day job, 4 = student, 5 = work and study"
df <- df %>% mutate('occupation' = Profession)
attr(df$occupation, "shortDescription") <- 'If employed, what was the mothers occupation at the time of pregnancy [1 = unemployment, 2 = Head of nurse, 3 = Doctor, 4 = Nurse, 5 = housewife, 6 = teacher, 7 = management, 8 = dental clinic, 9 = banking, 10 = untrained, 11 = psychologist, 12 = engineer'
df <- df %>% mutate("education_level" = Level_Studies)
attr(df$education_level, 'shortDescription') <- "The highest level of education that the mother had received at the time of pregnancy/study :
1 = primary
2 = secondary
3 = university students
4 = without studies
"
df <- df %>% mutate('sport' = Sport)
attr(df$sport, 'shortDescription') <- 'tracking the physical activity of the mother; whether the mother played a sport (during pregnancy) [y/n] :
0 = No
1 = Yes'
df <- df %>% mutate('pet' = PetDicotomica)
attr(df$pet, 'shortDescription') <- 'designation for mothers with pets at time of pregnancy'
df <- df %>% mutate('dyed_hair' = Appearance)
attr(df$dyed_hair, "shortDescription") <- 'designation for mothers with dyed hair during pregnancy :
0 = dyed
1 = natural'
# I THINK this is right, need to check
df <- df %>% mutate('first_pregnancy' = FirstPregnancy)
attr(df$first_pregnancy, 'shortDescription') <- 'primiparous; designation for mothers who are having a child for the first time'
df <- df %>% mutate('wanted_pregnancy' = WantedPregnancyDico)
attr(df$wanted_pregnancy, 'shortDescription') <- 'Designation for mothers who desired the pregnancy (planned?)'
df <- df %>% mutate('pregnancy_method' = PregnancyMethodDico)
attr(df$pregnancy_method, 'shortDescription') <- 'designation for *spontaneous* method of fertilization and *Fertility Treatment* or artificial insemination :
1 = spontaneously and normally
2 = in vitro fertilization
3 = artificial insemination'
# PC term? also, lol on "spontaneous"
df <- df %>% mutate('previous_miscarriage' = PreviousMiscarriagesDico)
attr(df$previous_miscarriage, 'shortDescription') <- 'Designation for mothers who have had a miscarriage prior to the current pregnancy'
# df <- df %>% mutate('delivery' = ) # CANT FIND DELIVERY AND LABOR
#df <- mutate('delivery_antisthesia' = ) # Not sure what this one is
df <- df %>% mutate('fetus_sex' = SexFetalDico)
attr(df$fetus_sex, "shortDescription") <- 'the sex of the fetus' # numeric mapping ???
# -------- DEPRESSION METRICS
df <- df %>% mutate('postpartum_depression' = depreposparto)
attr(df$postpartum_depression, 'shortDescription') <- "Diagnosis of post-partum depression in the mother:
1 = sindepre?
2 = condepre?" ### ???? more ?
df <- df %>% mutate('depression_tri1' = DEPRESSION1)
df <- df %>% mutate('depression_tri2' = DEPRESSION2)
df <- df %>% mutate('depression_tri3' = DEPRESSION3)
attr(df$depression_tri1, 'shortDescription') <- 'antenatal depression during the 1st trimester.'
attr(df$depression_tri2, 'shortDescription') <- 'antenatal depression during the 2nd trimester.'
attr(df$depression_tri3, 'shortDescription') <- 'antenatal depression during the 3rd trimester.'
df <- df %>% mutate('epds' = EPDS)
attr(df$epds, 'shortDescription') <- 'continuous metric of postnatal depression' #measurement name, type?
# --------- CORTISOL metrics
#the data set also contains a ln cortosol metic that is a log naturalised version of the normal cortosol metric
df <- df %>% mutate('cortisol_tri1' = Cortisol1) #WHAT IS LNCORTISOL1 ????? - Two metrics for coritsol??
df <- df %>% mutate('cortisol_tri2' = Cortisol2)
df <- df %>% mutate('cortisol_tri3' = Cortisol3)
attr(df$cortisol_tri1, 'shortDescription') <- 'Cortisol levels in mothers during pregnancy during the 1st trimester' # units??? look into lncortisol variable, which one should we be using?
attr(df$cortisol_tri2, 'shortDescription') <- 'Cortisol levels in mothers during pregnancy during the 2nd trimester'
attr(df$cortisol_tri3, 'shortDescription') <- 'Cortisol levels in mothers during pregnancy during the 3rd trimester'
# ---------- SELECT
df_tidy <- df %>% select(age, education_level, nationality, postpartum_depression, employed, occupation, sport, sport, pet, dyed_hair, first_pregnancy, wanted_pregnancy, pregnancy_method, previous_miscarriage, fetus_sex, epds, depression_tri1, depression_tri2, depression_tri3, cortisol_tri1, cortisol_tri2, cortisol_tri3)
#glimpse(df_tidy)
write.csv(df_tidy, file='./../../data/tidy_data.csv') # this might drop Description attribute
save(df_tidy, file='./../../data/tidy_data.Rdata') # this way we can always load it back as is. numeric_mapping <- '# Numeric Mapping
Primer embarazo / First Pregnancy:
0 = No
1 = Yes
Tipo Embarazo / Pregnancy Method:
1 = spontaneously and normally
2 = in vitro fertilization
3 = artificial insemination
Nivel Estudios / level of education
1 = primary
2 = secondary
3 = university students
4 = without studies
Laboral / employee situation
1 = unemployed
2 = full-time job
3 = half-day job
4 = student
5 = work and study
Aspecto Pelo/ Hair Appearance
0 = dyed
1 = natural
Deporte / Sport
0 = No
1 = Yes
Profesión / Profession
1 = unemployment
2 = Head of nurse
3 = Doctor
4 = Nurse
5 = housewife
6 = teacher
7 = management
8 = dental clinic
9 = banking
10 = untrained
11 = psychologist
12 = engineer
Origen / Nationality
1 = Spain
2 = Argentina
3 = Chile
4 = Morocco
5 = Romania
6 = Germany
7 = Russia
8 = Ecuador
9 = Peru
10 = Paraguay
depreposparto / depress post partum
1 = sindepre?
2 = condepre?'
attr(df_tidy, 'numeric mapping information') <- numeric_mapping# datamaid codebook
dataMaid::makeCodebook(df_tidy, replace=TRUE, codebook=TRUE, output='html')
#codebook codebook
df_tmp <- detect_scales(df_tidy) # loook for scalings between variables## Warning in detect_scales(df_tidy): depression_tri items found, but no
## aggregate
## Warning in detect_scales(df_tidy): cortisol_tri items found, but no
## aggregate
knitr::opts_chunk$set(warning = TRUE, message = TRUE, echo = FALSE) #this has to go before the codebook() call, make sure echo=FALSE is included.
my_codebook <- codebook(df_tmp) #my_codebook # (TAKES FOREVER TO KNIT) This will produce a codebook, but we need to have this knitted in it's own html file and not echo the code chunks... ## Warning in codebook(df_tmp): The variables session, created, ended have
## to be defined for automatic survey repetition detection to work. Set to no
## repetition by default.
knitr::opts_chunk$set(warning = TRUE, message = TRUE, echo = TRUE)
fh<-file("tmp.Rmd")
writeLines(paste(numeric_mapping,my_codebook), fh)
close(fh)
rmarkdown::render(input='tmp.Rmd', output_file = 'group7_codebook.html', output_dir = getwd() )##
|
| | 0%
|
|.................................................................| 100%
## ordinary text without R code
##
##
## "C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS tmp.utf8.md --to html4 --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash --output pandoc3ca055b958d1.html --smart --email-obfuscation none --self-contained --standalone --section-divs --template "C:\R-3.5.1\library\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable "theme:bootstrap" --include-in-header "C:\Users\natha\AppData\Local\Temp\RtmpOy2gA5\rmarkdown-str3ca0176e176f.html" --mathjax --variable "mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
#### The dataframe df_tidy has an attribute “shortDescription” that can be accessed with fn: attribute(df$variable, “Description”)
[1] codebook package:
Preprint Arslan, R. C. (2018). How to automatically generate rich codebooks from study metadata. doi:10.31234/osf.io/5qc6h
Zenodo Arslan, R. C. (2018). Automatic codebooks from survey metadata (2018). URL https://github.com/rubenarslan/codebook. DOI